Downloading US Census Data from API

This notebook shows how to donwload all blockgroup data for a set of variable You can change to download different geographies*

It uses three external libraries:

  1. Sunlight Labs Census Wrapper - Joe's Fork Note: blockgroups will not work with pip install census but may with other geographies.
  2. US States Library
  3. Pandas

In [1]:
# may need to point this to your census module
from us_census_api.core import Census 
# Download from pypi
from us import states

from pandas import DataFrame
import pandas as pd

In [2]:
census_API_key = 'API_KEY_HERE'

c = Census(census_API_key, year=2010)

In [4]:
# Test basic API call
data = DataFrame(c.acs5.state_county_blockgroup(('NAME', 'GEOID'), 36, '5', Census.ALL))
data.head()


Out[4]:
GEOID NAME block group county state tract
0 15000US360050001000 Block Group 0, Census Tract 1, Bronx County, N... 0 005 36 000100
1 15000US360050001001 Block Group 1, Census Tract 1, Bronx County, N... 1 005 36 000100
2 15000US360050002000 Block Group 0, Census Tract 2, Bronx County, N... 0 005 36 000200
3 15000US360050002001 Block Group 1, Census Tract 2, Bronx County, N... 1 005 36 000200
4 15000US360050002002 Block Group 2, Census Tract 2, Bronx County, N... 2 005 36 000200

In [12]:
# variable list:

var_edu =  {
    'GEOID':'GeoId',
    'B15002_001E': 'Edu Total',
    'B15002_002E': 'Edu Male Total',
}

for i in range(3,19):
    key = 'B15002_{}E'.format(str(i).zfill(3))
    var_edu[key] = 'Edu Male Level {}'.format(i-3)

for i in range(20,36):
    key = 'B15002_{}E'.format(str(i).zfill(3))
    var_edu[key] = 'Edu Female Level {}'.format(i-20)

var_edu_tup = tuple(key for key, val in var_edu.items())

In [13]:
var_edu


Out[13]:
{'B15002_001E': 'Edu Total',
 'B15002_002E': 'Edu Male Total',
 'B15002_003E': 'Edu Male Level 0',
 'B15002_004E': 'Edu Male Level 1',
 'B15002_005E': 'Edu Male Level 2',
 'B15002_006E': 'Edu Male Level 3',
 'B15002_007E': 'Edu Male Level 4',
 'B15002_008E': 'Edu Male Level 5',
 'B15002_009E': 'Edu Male Level 6',
 'B15002_010E': 'Edu Male Level 7',
 'B15002_011E': 'Edu Male Level 8',
 'B15002_012E': 'Edu Male Level 9',
 'B15002_013E': 'Edu Male Level 10',
 'B15002_014E': 'Edu Male Level 11',
 'B15002_015E': 'Edu Male Level 12',
 'B15002_016E': 'Edu Male Level 13',
 'B15002_017E': 'Edu Male Level 14',
 'B15002_018E': 'Edu Male Level 15',
 'B15002_020E': 'Edu Female Level 0',
 'B15002_021E': 'Edu Female Level 1',
 'B15002_022E': 'Edu Female Level 2',
 'B15002_023E': 'Edu Female Level 3',
 'B15002_024E': 'Edu Female Level 4',
 'B15002_025E': 'Edu Female Level 5',
 'B15002_026E': 'Edu Female Level 6',
 'B15002_027E': 'Edu Female Level 7',
 'B15002_028E': 'Edu Female Level 8',
 'B15002_029E': 'Edu Female Level 9',
 'B15002_030E': 'Edu Female Level 10',
 'B15002_031E': 'Edu Female Level 11',
 'B15002_032E': 'Edu Female Level 12',
 'B15002_033E': 'Edu Female Level 13',
 'B15002_034E': 'Edu Female Level 14',
 'B15002_035E': 'Edu Female Level 15',
 'GEOID': 'GeoId'}

In [14]:
# Test API call with variables
data = DataFrame(c.acs5.state_county_blockgroup(var_edu_tup, 36, '5', Census.ALL))
data.head()


Out[14]:
B15002_001E B15002_002E B15002_003E B15002_004E B15002_005E B15002_006E B15002_007E B15002_008E B15002_009E B15002_010E ... B15002_031E B15002_032E B15002_033E B15002_034E B15002_035E GEOID block group county state tract
0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 15000US360050001000 0 005 36 000100
1 7943 6931 102 70 260 278 438 1025 1366 1139 ... 19 45 6 0 8 15000US360050001001 1 005 36 000100
2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 15000US360050002000 0 005 36 000200
3 454 194 0 0 0 74 0 0 0 0 ... 40 7 0 0 0 15000US360050002001 1 005 36 000200
4 1152 563 0 0 0 0 0 14 0 12 ... 29 95 45 0 0 15000US360050002002 2 005 36 000200

5 rows × 39 columns


In [18]:
bkgps = {}

In [19]:
# THIS DOWNLOADS ALL THE DATA! 
# It can take awhile. 

# If it breaks:
# 1. check what state was the last state you got. assume you only got part of that state
# 2. delete that state in bkgps dict
# 3. change states.STATES to states.STATES[5:] (or wherever it broke)
# 4. repeat

for state in states.STATES:
    print(state)
    
    counties = c.acs5.state_county('NAME', state.fips, Census.ALL)
    data = []
    for county in counties:
        county_num = county['county']
        data.extend(c.acs5.state_county_blockgroup(var_edu_tup, state.fips, county_num, Census.ALL))
    bkgps[state] = data


Alabama
Alaska
Arizona
Arkansas
California
Colorado
Connecticut
Delaware
District of Columbia
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
Montana
Nebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virginia
Wisconsin
Wyoming
Out[19]:
B15002_001E B15002_002E B15002_003E B15002_004E B15002_005E B15002_006E B15002_007E B15002_008E B15002_009E B15002_010E ... B15002_031E B15002_032E B15002_033E B15002_034E B15002_035E GEOID block group county state tract
0 920 388 7 0 1 24 11 5 24 30 ... 28 54 10 0 0 15000US480019501001 1 001 48 950100
1 1560 907 0 11 43 11 25 61 97 15 ... 37 77 6 0 3 15000US480019501002 2 001 48 950100
2 1251 745 0 31 19 30 0 3 17 0 ... 14 25 8 0 0 15000US480019501003 3 001 48 950100
3 6876 6843 28 66 125 571 477 634 794 64 ... 8 0 0 0 0 15000US480019504011 1 001 48 950401
4 3574 3507 28 0 23 271 223 249 351 82 ... 16 0 0 0 0 15000US480019504021 1 001 48 950402

5 rows × 39 columns


In [ ]:
bkgpsDF = []
for key, val in bkgps.items():
    bkgpsDF.extend(val)
bkgpDF = DataFrame(bkgpsDF)
bkgpDF.head()

In [20]:
bkgpDF.to_csv('/Users/joe/Dropbox/SFI_CensusData/UnitedStates/2010acs_edu.csv', index=False)

In [ ]: